In [1]:
import requests as r
import re
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import seaborn as sns
import time
import matplotlib.pyplot as plt
%matplotlib inline
sns.set()
import warnings
warnings.simplefilter('ignore') #ignore the warnings, not the errors
In [2]:
# happy to say everything will work right now, not like the last time

Why

image.png

ChessUrl

image.png

everyone is the same - no need of machine learning to predict image.png

image.png


image.png

image.png

image.png

image.png

Results

image-2.png

What makes me happy?

image.png


image-2.png

image.png

  • money ?
  • cars ?
  • films ?

Data

Types of data that can influence my mood:

  • money
  • sport data
  • activities I do

image.png

Sharing personal info:

  • data is touchy - mood is limited by category
  • IronHack is excluded
  • money are not in EUR, smth like YEN

Mood

ChessUrl

Mood App

Drawing Drawing Drawing

Mood Exploratory

image.png

Mood Wrangle

In [3]:
mood=pd.read_csv('python_data/Daylio_01.2019_07.2020_date.mood.mood_text.activities.csv')
In [4]:
#separate activities by dummies

mood=pd.concat([mood[['full_date','mood_num', 'mood']],mood.activities.str.replace(' ','').str.get_dummies(sep='|')], axis=1)
In [5]:
#leave only the columns I feel comfortable sharing

mood=mood[['full_date', 'mood_num', 'mood','4pda/search', 'biking(20m+)', 'cleaning',
       'cook(nomicrow)', 'daysleep', 'drums(GHincluded)',
       'eatingout', 'films', 'friends', 'gaming', 'italiano', 'laundry',
       'maladie', 'party/bar', 'people(1h+)', 'personalwork',
       'reading', 'shopping(big)', 'shopping(courses)', 'sport',
       'stressventre', 'sun(onskin)', 'television', 'travel',
       'volley', 'walking(30m+)', 'workplacework']]

#rename somecolumns for better readability

mood.rename(columns={'full_date': "date", 
                     "mood_num": "mood_float", 
                     'sun(onskin)':'sunnyday', 
                     'cook(nomicrow)':'cook', 
                     'biking(20m+)':'biking', 
                     'drums(GHincluded)':'drums',
                    'walking(30m+)':'walking',
                    '4pda/search': 'repairing',
                    'volley': "volley_ball",
                    'shopping(courses)': "supermarket",
                    'people(1h+)': "people",
                     'maladie': "illness",
                     'shopping(big)':'shopping',
                     'stressventre':'stress'
                    }, inplace=True)
In [6]:
#date to datetime

mood.date=pd.to_datetime(mood.date)

#sort values and reset index

mood.sort_values(by='date',ascending=True,inplace=True)
mood=mood.reset_index(drop=True)
In [7]:
mood.head()
Out[7]:
date mood_float mood repairing biking cleaning cook daysleep drums eatingout ... shopping supermarket sport stress sunnyday television travel volley_ball walking workplacework
0 2019-01-01 4.0 good 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 1
1 2019-01-02 3.0 meh 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 2019-01-02 4.0 good 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
3 2019-01-03 4.0 good 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 2019-01-04 4.0 good 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 30 columns

Money

ChessUrl

Money app

Drawing Drawing Drawing

Money Exploratory

image.png

Money Wrangle

In [8]:
money=pd.read_csv('python_data/Monefy_01.2019_07.2020_Bills_CarteResto_Voyage.csv')
money_sp=pd.read_csv('python_data/Monefy_01.2019_07.2020_NO_Bills_NO_Voyage_With_CarteResto.csv')
money_no_bills=pd.read_csv('python_data/Monefy_01.2019_07.2020_NO_Bills_With_Voyage_With_CarteResto.csv')
In [9]:
#date to datetime

money.date=pd.to_datetime(money.date)
money_sp.date=pd.to_datetime(money_sp.date)
money_no_bills.date=pd.to_datetime(money_no_bills.date)
In [10]:
money.head()
Out[10]:
date category amount
0 2019-01-02 Bills 69040.0
1 2019-01-02 Other 1600.0
2 2019-01-02 Eating out 288.0
3 2019-01-03 Eating out 152.0
4 2019-01-03 House 5463.2

Steps

ChessUrl

Steps app

Drawing Drawing Drawing

Steps Exploratory

image-2.png

image.png

image.png

In [11]:
steps_lol=pd.read_csv('miband_data/ACTIVITY.csv')
steps_lol.date=pd.to_datetime(steps_lol.date)
steps_lol['year']=steps_lol.date.dt.year
steps_lol.year.value_counts()
Out[11]:
2019    700
2020    538
Name: year, dtype: int64

Wrangling

In [12]:
steps=pd.read_csv('miband_data/ACTIVITY_MINUTE.csv')
In [13]:
steps.date=pd.to_datetime(steps.date)
In [14]:
steps.head()
Out[14]:
date time steps
0 2020-07-07 00:53 54
1 2020-07-07 00:54 53
2 2020-07-07 00:56 24
3 2020-07-07 01:00 12
4 2020-07-07 01:07 15

Visualize

ChessUrl

Mood

In [15]:
import plotly.express as px
fig = px.pie(mood, values='mood_float', names='mood', title='Pie chart for Eldiias',height=400, hole=.3)
fig.update_traces(textposition='inside', textinfo='percent+label')
#, hole=.3
fig.show()
Drawing Drawing

Money

Drawing Drawing
In [16]:
money.head()
Out[16]:
date category amount
0 2019-01-02 Bills 69040.0
1 2019-01-02 Other 1600.0
2 2019-01-02 Eating out 288.0
3 2019-01-03 Eating out 152.0
4 2019-01-03 House 5463.2

image-4.png

In [17]:
#separate to bills and everything else

money['Bills'] = np.where(money['category']=='Bills', 'Bills', 'Not_Bills')
import plotly.express as px
fig = px.pie(money, values='amount', names='Bills', title='My spendings',height=400)
fig.update_traces(textposition='inside', textinfo='percent+label')
#, hole=.3
fig.show()

image-3.png

Merge

ChessUrl

In [18]:
#pivot table to sum all the buyings per day

money.pivot_table(index='date', values='amount', aggfunc='sum').head()
Out[18]:
amount
date
2019-01-02 70928.0
2019-01-03 7695.2
2019-01-04 568.0
2019-01-05 846.4
2019-01-06 1116.0
In [19]:
# add that pivot table to mood dataset

mm=mood.join(money.pivot_table(index='date', values='amount', aggfunc='sum'), on='date')
In [20]:
# preparing table to analysis

mm['amount_spent']=mm.amount.fillna(0)
mm.drop(index=1, inplace=True)            #drop a duplicate row
mm.drop('amount', axis=1,inplace=True)
mm.drop('mood', axis=1,inplace=True)
mm.reset_index(inplace=True, drop=True)
In [21]:
#pivot table to sum all the steps per day

steps.pivot_table(index='date', values='steps', aggfunc='sum')
Out[21]:
steps
date
2017-05-27 4710
2017-05-28 8548
2017-05-29 10090
2017-05-30 4193
2017-05-31 12976
... ...
2020-09-29 4241
2020-09-30 3735
2020-10-01 3992
2020-10-02 3439
2020-10-03 6598

1196 rows × 1 columns

In [22]:
# select only needed dates

steps_pivot=steps.pivot_table(index='date', values='steps', aggfunc='sum')
steps_pivot=steps_pivot.query('date>="2019-01-01" & date<="2020-07-31"')
In [23]:
# add that pivot table to mood dataset

mm_st=mm.join(steps_pivot, on='date')
In [24]:
#fillnawith average 

mm_st['steps'].fillna((mm_st['steps'].mean()), inplace=True)
In [25]:
mm_st.head()
Out[25]:
date mood_float repairing biking cleaning cook daysleep drums eatingout films ... sport stress sunnyday television travel volley_ball walking workplacework amount_spent steps
0 2019-01-01 4.0 0 0 0 0 0 0 0 0 ... 0 0 0 1 0 0 0 1 0.0 17308.0
1 2019-01-02 4.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 70928.0 16077.0
2 2019-01-03 4.0 0 0 0 0 0 0 0 1 ... 0 0 0 0 0 0 0 0 7695.2 6392.0
3 2019-01-04 4.0 0 0 0 0 0 0 0 1 ... 0 0 0 0 0 0 0 0 568.0 5222.0
4 2019-01-05 3.0 0 0 0 0 0 0 0 1 ... 0 0 0 1 0 0 1 0 846.4 13894.0

5 rows × 31 columns

Analyze

ChessUrl

Activity influence

In [26]:
mm
Out[26]:
date mood_float repairing biking cleaning cook daysleep drums eatingout films ... supermarket sport stress sunnyday television travel volley_ball walking workplacework amount_spent
0 2019-01-01 4.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 1 0.0
1 2019-01-02 4.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 70928.0
2 2019-01-03 4.0 0 0 0 0 0 0 0 1 ... 0 0 0 0 0 0 0 0 0 7695.2
3 2019-01-04 4.0 0 0 0 0 0 0 0 1 ... 0 0 0 0 0 0 0 0 0 568.0
4 2019-01-05 3.0 0 0 0 0 0 0 0 1 ... 0 0 0 0 1 0 0 1 0 846.4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
576 2020-07-27 4.5 0 1 0 0 0 0 1 0 ... 0 1 0 1 1 0 1 1 1 2199.2
577 2020-07-28 3.5 0 1 0 1 0 0 0 0 ... 0 1 0 1 1 0 1 1 1 1760.0
578 2020-07-29 4.0 0 1 0 1 0 0 1 0 ... 0 1 0 1 0 0 1 1 1 80.0
579 2020-07-30 4.0 0 1 0 0 0 0 1 0 ... 0 1 0 1 1 0 1 1 1 964.0
580 2020-07-31 4.0 0 0 0 1 0 0 0 0 ... 1 0 0 1 1 0 0 0 1 400.0

581 rows × 30 columns

In [27]:
# recognize if an activity adds something to my mood

mm.pivot_table(index='sport', values='mood_float', aggfunc='mean')

#show number of rows
Out[27]:
mood_float
sport
0 3.960591
1 4.091429
In [28]:
#let's get a table of that
#specify columns

cols=['repairing', 'biking', 'cleaning', 'cook',
       'daysleep', 'drums', 'eatingout', 'films', 'friends', 'gaming',
       'italiano', 'laundry', 'illness', 'party/bar', 'people', 'personalwork',
       'reading', 'shopping', 'supermarket', 'sport', 'stress',
       'sunnyday', 'television', 'travel', 'volley_ball', 'walking',
       'workplacework']

#create a table of activity impact on my mood

mood_inc=pd.DataFrame()
for col in cols:
    mood_inc=mood_inc.append({'Activity':col,'Mood_bonus':(mm.pivot_table(index=col, values='mood_float', aggfunc='mean').mood_float[1]-mm.pivot_table(index=col, values='mood_float', aggfunc='mean').mood_float[0])*100},ignore_index=True)

# Merge mood with the number of its occurencies

# that's actually very ugly but I was very down doing that so I leave it like that

mood_occ=mm[cols].mean()*100
mood_df=pd.DataFrame(mood_occ)
mood_df=mood_df.reset_index()
mood_df.rename(columns={'0':'Mood_occ'},inplace=True)
mood_df['Mood_occ']=mood_df[0]
mood_inc_occ=pd.concat([mood_inc,mood_df], axis=1)
mood_inc_occ.drop('index', axis=1,inplace=True)
mood_inc_occ=mood_inc_occ[['Activity','Mood_bonus','Mood_occ']]
mood_inc_occ.sort_values(by='Mood_bonus',ascending=False,inplace=True)
mood_inc_occ=mood_inc_occ.round(1)
In [29]:
#mood_inc
#mood_occ
mood_inc_occ.head(6)
Out[29]:
Activity Mood_bonus Mood_occ
14 people 65.1 89.7
5 drums 45.4 2.9
6 eatingout 40.3 11.9
8 friends 39.6 51.1
25 walking 32.6 66.1
21 sunnyday 31.3 50.1

ChessUrl

Mood by activity

image.png

Drawing Drawing

image.png

Do I need to walk to be happier ?

In [30]:
# What amount of steps in a day makes me happy?
mm_st.pivot_table(index='mood_float', values='steps', aggfunc='mean')
Out[30]:
steps
mood_float
2.0 7712.756433
3.0 11394.679740
3.5 7109.666667
4.0 11479.930916
4.5 8891.984872
5.0 13494.682508
In [31]:
mm_st.pivot_table(index='mood_float', values='steps', aggfunc='mean').plot(kind='bar',xlabel='Mood', ylabel='Steps',figsize=(12,8)).legend(['I like to walk']);

How much money spent makes me happy ?

ChessUrl

In [32]:
# What amount of money spent in a day makes me happy?
mm.pivot_table(index='mood_float', values='amount_spent', aggfunc='mean')
Out[32]:
amount_spent
mood_float
2.0 12162.000000
3.0 6203.915385
3.5 1014.933333
4.0 5196.104727
4.5 2896.957377
5.0 4157.074510
In [33]:
mm.pivot_table(index='mood_float', values='amount_spent', aggfunc='mean').plot(kind='bar',xlabel='Mood', ylabel='Money',figsize=(12,8)).legend(['Does anyone need a trendline here?']);

Predict

ChessUrl

In [34]:
mm['weekday']=mm.date.dt.weekday
mm['month']=mm.date.dt.month
mm['day_of_month']=mm.date.dt.day
mm['weekend'] = np.where(mm['weekday']<5, 0, 1) #weekend or not
In [35]:
#create normalized dataframe

mmn=mm.copy()

#normalize 2 columns - float and amount

mmn['mood_float']=(mm.mood_float-mm.mood_float.mean())/mm.mood_float.std() 
mmn['amount_spent']=(mm.amount_spent-mm.amount_spent.mean())/mm.amount_spent.std() 
mmn.drop('date', axis=1, inplace=True)

Start of Eldiias crazy coding

In [36]:
y=mmn.mood_float
X=mmn.drop('mood_float',axis=1)
In [37]:
from sklearn.decomposition import PCA
pca=PCA(7)
pca.fit(X)
x_new=pca.transform(X)
In [38]:
pca.explained_variance_ratio_.sum()
Out[38]:
0.9759730611175594
In [39]:
from statsmodels.api import OLS
from statsmodels.api import add_constant
In [40]:
from sklearn.tree import DecisionTreeClassifier
In [41]:
y
Out[41]:
0      0.000000
1      0.000000
2      0.000000
3      0.000000
4     -1.512603
         ...   
576    0.756302
577   -0.756302
578    0.000000
579    0.000000
580    0.000000
Name: mood_float, Length: 581, dtype: float64
In [42]:
kkk=DecisionTreeClassifier().fit(x_new,(mm.mood_float*2).astype(int))
In [43]:
plt.hist(kkk.predict(x_new))
plt.show()
(mm.mood_float*2).astype(int).hist()
Out[43]:
<AxesSubplot:>
In [44]:
mm.mood_float.unique()
Out[44]:
array([4. , 3. , 5. , 2. , 3.5, 4.5])
In [45]:
mm.mood_float/5
Out[45]:
0      0.8
1      0.8
2      0.8
3      0.8
4      0.6
      ... 
576    0.9
577    0.7
578    0.8
579    0.8
580    0.8
Name: mood_float, Length: 581, dtype: float64
In [46]:
from sklearn.metrics import r2_score
In [47]:
r2_score((mm.mood_float*2).astype(int), kkk.predict(x_new))
Out[47]:
1.0
In [48]:
from sklearn.metrics import confusion_matrix 
In [49]:
#that thing says that I have 100% SUPER PREDICTION

confusion_matrix((mm.mood_float*2).astype(int), kkk.predict(x_new))
Out[49]:
array([[  6,   0,   0,   0,   0,   0],
       [  0, 104,   0,   0,   0,   0],
       [  0,   0,  33,   0,   0,   0],
       [  0,   0,   0, 275,   0,   0],
       [  0,   0,   0,   0,  61,   0],
       [  0,   0,   0,   0,   0, 102]], dtype=int64)
In [50]:
pca.components_
Out[50]:
array([[ 4.79235213e-04,  1.41878190e-03, -2.07385097e-04,
         1.45410195e-04, -1.85148347e-03, -2.05383485e-03,
        -2.10059362e-03, -1.17325225e-03, -2.08241706e-03,
         5.83447174e-03, -1.69480954e-04,  1.44092469e-03,
         2.28837915e-04, -1.36161074e-03,  6.24554273e-04,
         1.43217764e-03, -4.31193444e-04, -1.11321704e-03,
         1.00477301e-03,  3.44734312e-03, -2.68140814e-03,
         9.49746630e-03,  6.43489377e-03,  1.36998592e-03,
         1.93463107e-03,  5.70932061e-03,  3.57664159e-03,
        -2.60112181e-02, -1.37313159e-04,  3.45142702e-03,
         9.99523270e-01, -1.41240636e-04],
       [ 9.50188128e-03, -2.22633916e-02,  2.27564477e-03,
        -4.23493948e-03,  1.21717666e-02, -6.48023176e-03,
        -9.35261470e-03,  1.32595510e-02,  9.22460586e-03,
        -1.76204257e-03,  1.21437294e-02,  3.41158781e-03,
        -4.37479579e-04, -8.30573122e-03,  1.09278794e-02,
        -3.99020533e-02,  1.34096661e-02, -7.00718548e-04,
        -2.58561712e-02, -3.57472924e-03, -3.84738469e-03,
         2.86755466e-03,  1.55198146e-02,  1.71353023e-02,
         2.86450092e-03,  3.40159846e-02,  4.31589398e-02,
         6.32142604e-03, -6.67075434e-04,  9.96196797e-01,
        -3.64830026e-03,  7.30549730e-04],
       [ 5.82220017e-04,  1.23452414e-03,  3.97380943e-03,
         1.52618688e-02, -1.26941182e-02, -9.52248418e-03,
        -1.55161882e-02, -1.35295558e-02, -2.47468873e-02,
         1.95006966e-02,  2.52539580e-03, -9.19639727e-03,
         6.38585396e-06, -8.13022845e-03, -8.31885897e-03,
        -2.49707361e-03,  6.44623538e-02, -4.35381149e-03,
         1.47583822e-02,  1.94398497e-02,  4.01393200e-03,
         8.50841955e-04,  1.35037122e-02, -3.47384530e-02,
         1.63766449e-02, -3.94876382e-02,  6.85812784e-02,
         4.82290692e-02, -9.75309877e-01, -2.12071950e-03,
         7.07204313e-04, -1.77711439e-01],
       [ 9.32560609e-03, -5.56518027e-02,  1.76280634e-02,
        -5.17554749e-02, -8.55725798e-03,  5.36107033e-03,
         1.08142565e-03, -1.66035259e-02, -3.95467571e-02,
        -2.57198278e-02,  3.92981020e-02, -4.90675471e-03,
         1.29897548e-05,  1.36021106e-02, -1.50623113e-02,
        -4.74807800e-02,  3.06834279e-02,  8.86908225e-03,
        -4.38680295e-02, -5.91221629e-02,  2.24911668e-02,
        -2.74978424e-02, -3.05340066e-02,  1.17164814e-02,
        -1.63873912e-02, -1.05069911e-02,  2.43877691e-02,
         9.86877151e-01,  4.96233643e-02, -1.10890730e-02,
         2.67466147e-02,  3.86411455e-03],
       [-4.91734173e-03,  2.82999932e-01,  1.81545105e-02,
         4.01027908e-01, -6.77162857e-03,  5.20734122e-02,
         1.15006569e-01,  8.20214751e-02,  2.83150263e-01,
         6.69922200e-02, -2.50919889e-01,  2.74210981e-02,
         2.54517657e-03,  1.90487329e-02, -8.49060195e-03,
         4.29929019e-01, -2.68735871e-02, -1.11191435e-02,
         2.61707099e-01,  3.26278461e-01, -7.06218848e-03,
         2.24141940e-01,  1.60703869e-01, -7.26721806e-02,
         5.97499809e-02,  2.39724906e-01, -2.52677619e-01,
         1.36359914e-01, -1.39297369e-02,  3.44646890e-02,
        -2.13683246e-03,  1.76691295e-02],
       [-3.01475596e-02, -1.95012515e-02, -1.00722430e-02,
        -1.90914738e-01, -5.67730044e-02, -8.78321446e-03,
         9.05158600e-02,  1.96996999e-03,  8.90337950e-02,
        -1.51092982e-01,  2.04060346e-01, -1.62909589e-02,
        -8.67799822e-05, -6.81077918e-02,  1.09847659e-01,
        -2.09930174e-02, -3.66198605e-01, -1.20323240e-03,
        -4.04969281e-02, -4.69967516e-02,  2.35135403e-02,
         4.13695886e-01, -4.76655007e-01,  3.46075450e-01,
        -9.09103141e-03,  3.91891439e-01, -1.98376889e-01,
        -5.86823508e-03, -9.23962329e-02, -6.36143669e-03,
        -1.74592009e-03,  5.10754068e-02],
       [-2.03018831e-02, -1.63154280e-01, -2.70055044e-03,
         9.55874286e-02, -3.52106969e-02, -3.21559601e-02,
        -1.90958485e-01,  3.40225618e-01, -6.19092867e-01,
         5.90132147e-02,  7.46449784e-02, -1.49072531e-02,
         2.89715905e-03, -3.90386719e-01, -2.53251242e-01,
         1.82509639e-02, -2.48744574e-02, -4.60156314e-03,
        -3.07943963e-02,  3.28510074e-01, -8.51316458e-02,
         2.61650670e-01,  7.90338513e-02,  1.37205419e-02,
         2.48390803e-03, -7.52247311e-03, -8.47791368e-02,
         9.53749368e-03,  2.54714161e-02, -1.87520132e-03,
        -5.66947079e-03, -1.49638503e-02]])
  1. PCA
  2. test_train_split
  3. DecisionTreeClassifier
  4. Confusion Matrix

  5. DecisionTreeClassifier based on data without transformations

End of Eldiias crazy coding

In [51]:
# apparently, I can get a decision tree

# function to predict my mood based on parameters - must work with a group of parameters

# I can run Monte Carlo test on 1 million lines to get the activities

ChessUrl

In [52]:
#recap

Learnings

  • data collection is very important

Improvements

  • how much money do I spend by walk
  • how much money do I spend by pulse
  • how much money do I spend by sleep
  • how much money do I spend by mood activity
  • how much hours do I sleep by spend
  • how much hours do I sleep by walk
  • how much hours do I sleep by mood
  • how much stress do I stress by stress (pulse)

  • sleep hours
  • pulse data
  • weather data as +
  • income data

If I was to start from a scratch...

  • get to Russia faster

Bonus - sleep

In [53]:
sleep=pd.read_csv('miband_data/SLEEP.csv')
sleep.date=pd.to_datetime(sleep.date)
sleep.drop_duplicates(subset=None, keep='first', inplace=True)
In [54]:
sleep['sleep_time'] = (sleep['deepSleepTime'] + sleep['shallowSleepTime'])/60
In [55]:
#data is actually limited by miband to 2019 and 2020, here I will limit it to the same limit as before

sleep=sleep.query('date<="2020-07-31"')
In [56]:
# no need for pivot table here but I copy the code from previous ones so it's easier

sleep_pivot=sleep.pivot_table(index='date', values='sleep_time', aggfunc='mean')

#join

mm_sl=mm.join(sleep_pivot, on='date')

#fillna  with average - fillna doesn't work

mm_sl['sleep_time_right'] = np.where(mm_sl['sleep_time']==0, mm_sl['sleep_time'].median(),mm_sl['sleep_time'])
In [57]:
mm_sl.pivot_table(index='mood_float', values='sleep_time_right', aggfunc='mean').plot(kind='bar',xlabel='Mood', ylabel='Sleep',figsize=(12,8)).legend(['I do not care about sleeping']);
In [58]:
sleep.sleep_time.plot(kind='box')
Out[58]:
<AxesSubplot:>

Bonus - pulse

In [59]:
pulse=pd.read_csv('miband_data/HEARTRATE_AUTO.csv')
#pulse.drop_duplicates(subset=None, keep='first', inplace=True) - no duplicates

pulse.date=pd.to_datetime(pulse.date)

pulse.time=pd.to_datetime(pulse.time)

pulse.sort_values(by='date',inplace=True)
In [60]:
pulse.pivot_table(index='date', values='heartRate', aggfunc='mean').plot(kind='box')
Out[60]:
<AxesSubplot:>
In [61]:
# no need for pivot table here but I copy the code from previous ones so it's easier

pulse_pivot=pulse.pivot_table(index='date', values='heartRate', aggfunc='mean')

#join

mm_pu=mm.join(pulse_pivot, on='date')


# NO fillna with average - a lot of data is missing - DROPNA

mm_pu.dropna(inplace=True)
In [62]:
mm_pu.pivot_table(index='mood_float', values='heartRate', aggfunc='mean').plot(kind='bar',xlabel='Mood', ylabel='Pulse',figsize=(12,8)).set_ylim(71,81);
In [63]:
sns.lineplot(data=pulse, x="time", y="heartRate")
Out[63]:
<AxesSubplot:xlabel='time', ylabel='heartRate'>
In [64]:
import plotly
plotly.offline.init_notebook_mode()